# Downloading, normalizing and plotting the data

library(pd.hta.2.0)
library(oligo)
celFiles <- list.celfiles()
affyRaw <- read.celfiles(celFiles)
## Reading in : 01-13.3577_(HTA-2_0).CEL
## Reading in : 02-13.3578_(HTA-2_0).CEL
## Reading in : 03-13.3580.2_(HTA-2_0).CEL
## Reading in : 04-13.3582_(HTA-2_0).CEL
## Reading in : 05-13.3623_(HTA-2_0).CEL
## Reading in : 06-13.3627.1_(HTA-2_0).CEL
## Reading in : 07-13.3633_(HTA-2_0).CEL
## Reading in : 08-13.6393.3_(HTA-2_0).CEL
## Reading in : 09-13.6394_(HTA-2_0).CEL
## Reading in : 10-13.12026.2_(HTA-2_0).CEL
## Reading in : 11-13.13283_(HTA-2_0).CEL
## Reading in : 12-16.082_(HTA-2_0).CEL
eset <- rma(affyRaw)
## Background correcting
## Normalizing
## Calculating Expression
boxplot(eset, col=cols)

hist(eset, col=cols)

fit <- fitProbeLevelModel(affyRaw)
NUSE(fit, las = 2, cex.axis = 0.35)

RLE(fit, las = 2, cex.axis = 0.35)

# Annotating the expression set

library(hta20sttranscriptcluster.db)
my_frame <- data.frame(exprs(eset))
x <- hta20sttranscriptclusterSYMBOL
mapped_probes <- mappedkeys(x)
xx <- as.list(x[mapped_probes])
universe <- unlist(xx)
write.table(universe, 'universe.txt', quote =F, sep ='\t',col.names = F, row.names = T)
universe2<-read.table("universe.txt", header = FALSE)
rownames(universe2)<-universe2[,1]
eset_array<-merge(universe2, my_frame, by=0)
dim(eset_array)
## [1] 23964    12
# Plotting raw images and scatter plots
library(affy)
## 
## Attaching package: 'affy'
## The following objects are masked from 'package:oligo':
## 
##     intensity, MAplot, mm, mm<-, mmindex, pm, pm<-, pmindex,
##     probeNames, rma
## The following object is masked from 'package:oligoClasses':
## 
##     list.celfiles
par(mfrow=c(2,6))
image(affyRaw[,1])

image(affyRaw[,2])

image(affyRaw[,3])

image(affyRaw[,4])

image(affyRaw[,5])

image(affyRaw[,6])

image(affyRaw[,7])

image(affyRaw[,8])

image(affyRaw[,9])

image(affyRaw[,10])

image(affyRaw[,11])

image(affyRaw[,12])

par(mfrow=c(1,1))
pairs(exprs(eset)[,1:6], pch=".",main="Scatter plots",col=cols) 

pairs(exprs(eset)[,7:12], pch=".",main="Scatter plots",col=cols) 

mva.pairs(exprs(eset)[,1:6],,col=cols,cex = 0.5)

mva.pairs(exprs(eset)[,7:12],,col=cols,cex = 0.5)

### Data clustering
library(ConsensusClusterPlus)

# Select the 5000 most variable probes with median absolute distance
eset_array2<-apply(eset_array,1,mad)
eset_array2<-eset_array[rev(order(eset_array2))[1:5000],]

# Agglomerative heirarchical clustering using Pearson correlation distance
eset_array2 <- sweep(eset_array2,1, apply(eset_array2,1,median,na.rm=T))

cluster <- ConsensusClusterPlus(eset_array2,
                                maxK=4,
                                reps=1000,
                                pItem=0.8,
                                pFeature=1,
                                title="consensus cluster",
                                clusterAlg="hc",
                                distance="pearson")

# Plots
icl = calcICL(cluster,title="cluster") 

# Retreive information for k=2 (most informative clusterisation)
cluster[[2]][["consensusClass"]]
##    X01.13.3577_.HTA.2_0..CEL    X02.13.3578_.HTA.2_0..CEL 
##                            1                            1 
##  X03.13.3580.2_.HTA.2_0..CEL    X04.13.3582_.HTA.2_0..CEL 
##                            2                            2 
##    X05.13.3623_.HTA.2_0..CEL  X06.13.3627.1_.HTA.2_0..CEL 
##                            1                            1 
##    X07.13.3633_.HTA.2_0..CEL  X08.13.6393.3_.HTA.2_0..CEL 
##                            2                            2 
##    X09.13.6394_.HTA.2_0..CEL X10.13.12026.2_.HTA.2_0..CEL 
##                            1                            2 
##   X11.13.13283_.HTA.2_0..CEL     X12.16.082_.HTA.2_0..CEL 
##                            2                            2

### Statistical analysis with linear regression
library(limma)
fit <- lmFit(eset_array, design)
cont.matrix <- makeContrasts(cluster1vscluster2=cluster1-cluster2, levels=design) 
fit <- contrasts.fit(fit, cont.matrix)
fit <- eBayes(fit)
dim(topTable(fit, number = "inf", p.value = 0.05))
## [1] 3445    6